#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Time    : 2018/1/27 0027 12:34
# @Author  : Arliki
# @email   : hkdnxycz@outlook.com
# @File    : daomu
import scrapy
from redis import Redis
from scrapy_redis.spiders import RedisSpider


class DaomuSpider(RedisSpider):
    name = 'daomu'
    redis_key = 'daomu:start_url'

    def __init__(self, *args, **kwargs):
        domain = kwargs.pop('domain', '')
        self.allowed_domains = filter(None, domain.split(','))
        super(DaomuSpider, self).__init__(*args, **kwargs)
    def parse(self, response):
        box = response.xpath('//li[starts-with(@class,"menu-item menu-item-type-taxonomy")]')
        for x in box:
            url = x.xpath('a/@href').extract()[0]
            yield scrapy.Request(url, callback=self.push_link, dont_filter=True)

    def push_link(self, response):
        r = Redis()
        for x in response.xpath('//article[@class="excerpt excerpt-c3"]'):
            r.lpush('daomu:start_link', x.xpath('a/@href').extract()[0])
            yield None
